Run download_data.Rmd and percentage_of_regional_richness.Rmd First!
fetch_city_data_for <- function(pool_name, include_city_name = F) {
results_filename <- paste(paste(pool_name, 'city', 'richness', 'intercept', sep = "_"), "csv", sep = ".")
results <- read_csv(results_filename)
joined <- left_join(city_data, results)
pool_size_col_name <- paste(pool_name, 'pool', 'size', sep = "_")
required_columns <- c("response", pool_size_col_name, "population_growth", "rainfall_monthly_min", "rainfall_annual_average", "rainfall_monthly_max", "temperature_annual_average", "temperature_monthly_min", "temperature_monthly_max", "happiness_negative_effect", "happiness_positive_effect", "happiness_future_life", "number_of_biomes", "realm", "biome_name", "region_20km_includes_estuary", "region_50km_includes_estuary", "region_100km_includes_estuary", "city_includes_estuary", "region_20km_average_pop_density", "region_50km_average_pop_density", "region_100km_average_pop_density", "city_max_pop_density", "city_average_pop_density", "mean_population_exposure_to_pm2_5_2019", "region_20km_cultivated", "region_20km_urban", "region_50km_cultivated", "region_50km_urban", "region_100km_cultivated", "region_100km_urban", "region_20km_elevation_delta", "region_20km_mean_elevation", "region_50km_elevation_delta", "region_50km_mean_elevation", "region_100km_elevation_delta", "region_100km_mean_elevation", "city_elevation_delta", "city_mean_elevation", "urban", "shrubs", "permanent_water", "open_forest", "herbaceous_wetland", "herbaceous_vegetation", "cultivated", "closed_forest", "share_of_population_within_400m_of_open_space", "percentage_urban_area_as_streets", "percentage_urban_area_as_open_public_spaces_and_streets", "percentage_urban_area_as_open_public_spaces", "city_gdp_per_population")
if (include_city_name) {
required_columns <- append(c("name"), required_columns)
}
joined[,required_columns]
}
merlin_city_data <- fetch_city_data_for('merlin')
── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
cols(
name = col_character(),
response = col_double()
)
Joining, by = "name"
merlin_city_data
library(randomForest)
randomForest 4.6-14
Type rfNews() to see new features/changes/bug fixes.
library(reshape2)
library(rpart)
library(ggplot2)
Attaching package: ‘ggplot2’
The following object is masked from ‘package:randomForest’:
margin
library(tidyverse)
Registered S3 methods overwritten by 'dbplyr':
method from
print.tbl_lazy
print.tbl_sql
── Attaching packages ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.3.1 ──
✓ tibble 3.1.2 ✓ dplyr 1.0.7
✓ tidyr 1.1.3 ✓ stringr 1.4.0
✓ readr 1.4.0 ✓ forcats 0.5.1
✓ purrr 0.3.4
── Conflicts ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
x dplyr::combine() masks randomForest::combine()
x dplyr::filter() masks stats::filter()
x dplyr::lag() masks stats::lag()
x ggplot2::margin() masks randomForest::margin()
merlin_city_data_fixed <- rfImpute(response ~ ., merlin_city_data)
| Out-of-bag |
Tree | MSE %Var(y) |
300 | 3.71 20.58 |
| Out-of-bag |
Tree | MSE %Var(y) |
300 | 3.979 22.07 |
| Out-of-bag |
Tree | MSE %Var(y) |
300 | 4.115 22.83 |
| Out-of-bag |
Tree | MSE %Var(y) |
300 | 4.001 22.19 |
| Out-of-bag |
Tree | MSE %Var(y) |
300 | 4.161 23.08 |
merlin_city_data_fixed <- rfImpute(response ~ ., merlin_city_data)
| Out-of-bag |
Tree | MSE %Var(y) |
300 | 3.902 21.65 |
| Out-of-bag |
Tree | MSE %Var(y) |
300 | 3.999 22.19 |
| Out-of-bag |
Tree | MSE %Var(y) |
300 | 3.921 21.75 |
| Out-of-bag |
Tree | MSE %Var(y) |
300 | 3.971 22.03 |
| Out-of-bag |
Tree | MSE %Var(y) |
300 | 4.177 23.17 |
merlin_city_data_fixed
randomForest(response ~ ., merlin_city_data_fixed)
Call:
randomForest(formula = response ~ ., data = merlin_city_data_fixed)
Type of random forest: regression
Number of trees: 500
No. of variables tried at each split: 17
Mean of squared residuals: 16.40603
% Var explained: 8.99
select_variables_from_random_forest(merlin_city_data_fixed)
[1] "merlin_pool_size" "realm" "biome_name"
[4] "rainfall_monthly_min" "temperature_annual_average" "happiness_positive_effect"
[7] "region_20km_elevation_delta" "percentage_urban_area_as_open_public_spaces" "region_20km_urban"
[10] "region_50km_elevation_delta" "temperature_monthly_min" "region_20km_cultivated"
[13] "permanent_water" "region_50km_urban" "region_100km_cultivated"
[16] "shrubs" "city_gdp_per_population" "region_50km_cultivated"
[19] "region_100km_elevation_delta" "happiness_negative_effect" "region_100km_urban"
[22] "region_50km_average_pop_density" "region_20km_average_pop_density" "share_of_population_within_400m_of_open_space"
[25] "rainfall_annual_average" "city_average_pop_density" "herbaceous_wetland"
[28] "temperature_monthly_max" "region_100km_average_pop_density" "city_mean_elevation"
[31] "mean_population_exposure_to_pm2_5_2019" "rainfall_monthly_max" "happiness_future_life"
[34] "city_max_pop_density" "region_50km_mean_elevation" "cultivated"
[37] "region_20km_mean_elevation" "region_100km_mean_elevation" "urban"
[40] "population_growth" "percentage_urban_area_as_open_public_spaces_and_streets" "open_forest"
[43] "percentage_urban_area_as_streets" "closed_forest"
select_variables_from_random_forest(merlin_city_data_fixed_single_scale)
[1] "merlin_pool_size" "realm" "biome_name"
[4] "temperature_annual_average" "happiness_positive_effect" "region_20km_elevation_delta"
[7] "percentage_urban_area_as_open_public_spaces" "rainfall_monthly_min" "permanent_water"
[10] "temperature_monthly_min" "region_20km_urban" "shrubs"
[13] "region_20km_cultivated" "happiness_negative_effect" "share_of_population_within_400m_of_open_space"
[16] "temperature_monthly_max" "rainfall_monthly_max" "rainfall_annual_average"
[19] "happiness_future_life" "city_max_pop_density" "city_mean_elevation"
[22] "city_elevation_delta" "cultivated" "population_growth"
[25] "region_50km_mean_elevation" "percentage_urban_area_as_streets" "open_forest"
[28] "closed_forest"
create_fifty_rows_of_oob(merlin_city_data_fixed[,c("response", "merlin_pool_size")])
[1] "Mean 18.3605779376349 , SD: 0.199971871406664 , Mean + SD: 18.5605498090416"
create_fifty_rows_of_oob(merlin_city_data_fixed[,c("response", "merlin_pool_size", "realm")])
[1] "Mean 13.8483857782536 , SD: 0.167233775821336 , Mean + SD: 14.015619554075"
create_fifty_rows_of_oob(merlin_city_data_fixed[,c("response", "merlin_pool_size", "realm", "biome_name")])
[1] "Mean 14.1417551442684 , SD: 0.160688673425095 , Mean + SD: 14.3024438176934"
create_fifty_rows_of_oob(merlin_city_data_fixed[,c("response", "merlin_pool_size", "realm", "biome_name", "temperature_annual_average")])
[1] "Mean 14.5103292900374 , SD: 0.234962153093279 , Mean + SD: 14.7452914431307"
create_fifty_rows_of_oob(merlin_city_data_fixed[,c("response", "merlin_pool_size", "realm", "biome_name", "temperature_annual_average", "happiness_positive_effect")])
[1] "Mean 14.7574657636192 , SD: 0.243199251750122 , Mean + SD: 15.0006650153693"
create_fifty_rows_of_oob(merlin_city_data_fixed[,c("response", "merlin_pool_size", "realm", "biome_name", "temperature_annual_average", "happiness_positive_effect", "region_20km_elevation_delta")])
[1] "Mean 14.9256981639764 , SD: 0.240704529916054 , Mean + SD: 15.1664026938924"
create_fifty_rows_of_oob(merlin_city_data_fixed[,c("response", "merlin_pool_size", "realm", "biome_name", "temperature_annual_average", "happiness_positive_effect", "region_20km_elevation_delta", "percentage_urban_area_as_open_public_spaces")])
[1] "Mean 14.788101572417 , SD: 0.263257912139604 , Mean + SD: 15.0513594845566"
create_fifty_rows_of_oob(merlin_city_data_fixed[,c("response", "merlin_pool_size", "realm", "biome_name", "temperature_annual_average", "happiness_positive_effect", "region_20km_elevation_delta", "percentage_urban_area_as_open_public_spaces", "rainfall_monthly_min")])
[1] "Mean 14.708125023514 , SD: 0.244893814930232 , Mean + SD: 14.9530188384442"
create_fifty_rows_of_oob(merlin_city_data_fixed[,c("response", "merlin_pool_size", "realm", "biome_name", "temperature_annual_average", "happiness_positive_effect", "region_20km_elevation_delta", "percentage_urban_area_as_open_public_spaces", "rainfall_monthly_min", "permanent_water")])
[1] "Mean 14.71026136053 , SD: 0.251430414275647 , Mean + SD: 14.9616917748057"
create_fifty_rows_of_oob(merlin_city_data_fixed[,c("response", "merlin_pool_size", "realm", "biome_name", "temperature_annual_average", "happiness_positive_effect", "region_20km_elevation_delta", "percentage_urban_area_as_open_public_spaces", "rainfall_monthly_min", "permanent_water", "temperature_monthly_min")])
[1] "Mean 15.0471745956683 , SD: 0.215420444110583 , Mean + SD: 15.2625950397789"
create_fifty_rows_of_oob(merlin_city_data_fixed[,c("response", "merlin_pool_size", "realm", "biome_name", "temperature_annual_average", "happiness_positive_effect", "region_20km_elevation_delta", "percentage_urban_area_as_open_public_spaces", "rainfall_monthly_min", "permanent_water", "temperature_monthly_min", "region_20km_urban")])
[1] "Mean 15.1350022423199 , SD: 0.339891929354689 , Mean + SD: 15.4748941716745"
create_fifty_rows_of_oob(merlin_city_data_fixed[,c("response", "merlin_pool_size", "realm", "biome_name", "temperature_annual_average", "happiness_positive_effect", "region_20km_elevation_delta", "percentage_urban_area_as_open_public_spaces", "rainfall_monthly_min", "permanent_water", "temperature_monthly_min", "region_20km_urban", "shrubs")])
[1] "Mean 15.1353707728019 , SD: 0.319439216090642 , Mean + SD: 15.4548099888925"
create_fifty_rows_of_oob(merlin_city_data_fixed[,c("response", "merlin_pool_size", "realm", "biome_name", "temperature_annual_average", "happiness_positive_effect", "region_20km_elevation_delta", "percentage_urban_area_as_open_public_spaces", "rainfall_monthly_min", "permanent_water", "temperature_monthly_min", "region_20km_urban", "shrubs", "region_20km_cultivated")])
[1] "Mean 15.169974474564 , SD: 0.270058572377652 , Mean + SD: 15.4400330469417"
create_fifty_rows_of_oob(merlin_city_data_fixed[,c("response", "merlin_pool_size", "realm", "biome_name", "temperature_annual_average", "happiness_positive_effect", "region_20km_elevation_delta", "percentage_urban_area_as_open_public_spaces", "rainfall_monthly_min", "permanent_water", "temperature_monthly_min", "region_20km_urban", "shrubs", "region_20km_cultivated", "happiness_negative_effect")])
[1] "Mean 15.2117893322605 , SD: 0.300179792841188 , Mean + SD: 15.5119691251017"
create_fifty_rows_of_oob(merlin_city_data_fixed[,c("response", "merlin_pool_size", "realm", "biome_name", "temperature_annual_average", "happiness_positive_effect", "region_20km_elevation_delta", "percentage_urban_area_as_open_public_spaces", "rainfall_monthly_min", "permanent_water", "temperature_monthly_min", "region_20km_urban", "shrubs", "region_20km_cultivated", "happiness_negative_effect", "share_of_population_within_400m_of_open_space")])
[1] "Mean 15.2800537211106 , SD: 0.287378331903091 , Mean + SD: 15.5674320530137"
create_fifty_rows_of_oob(merlin_city_data_fixed[,c("response", "merlin_pool_size", "realm", "biome_name", "temperature_annual_average", "happiness_positive_effect", "region_20km_elevation_delta", "percentage_urban_area_as_open_public_spaces", "rainfall_monthly_min", "permanent_water", "temperature_monthly_min", "region_20km_urban", "shrubs", "region_20km_cultivated", "happiness_negative_effect", "share_of_population_within_400m_of_open_space", "temperature_monthly_max")])
[1] "Mean 15.4080077907785 , SD: 0.299436746178816 , Mean + SD: 15.7074445369573"
create_fifty_rows_of_oob(merlin_city_data_fixed[,c("response", "merlin_pool_size", "realm", "biome_name", "temperature_annual_average", "happiness_positive_effect", "region_20km_elevation_delta", "percentage_urban_area_as_open_public_spaces", "rainfall_monthly_min", "permanent_water", "temperature_monthly_min", "region_20km_urban", "shrubs", "region_20km_cultivated", "happiness_negative_effect", "share_of_population_within_400m_of_open_space", "temperature_monthly_max", "rainfall_monthly_max")])
[1] "Mean 15.6416908542101 , SD: 0.24366735672925 , Mean + SD: 15.8853582109394"
create_fifty_rows_of_oob(merlin_city_data_fixed[,c("response", "merlin_pool_size", "realm", "biome_name", "temperature_annual_average", "happiness_positive_effect", "region_20km_elevation_delta", "percentage_urban_area_as_open_public_spaces", "rainfall_monthly_min", "permanent_water", "temperature_monthly_min", "region_20km_urban", "shrubs", "region_20km_cultivated", "happiness_negative_effect", "share_of_population_within_400m_of_open_space", "temperature_monthly_max", "rainfall_monthly_max", "rainfall_annual_average")])
[1] "Mean 15.6107006616448 , SD: 0.240063905776268 , Mean + SD: 15.850764567421"
“merlin_pool_size”, “realm”
birdlife_city_data <- fetch_city_data_for('birdlife')
── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
cols(
name = col_character(),
response = col_double()
)
Joining, by = "name"
birdlife_city_data <- fetch_city_data_for('birdlife')
── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
cols(
name = col_character(),
response = col_double()
)
Joining, by = "name"
birdlife_city_data
birdlife_city_data_fixed <- rfImpute(response ~ ., birdlife_city_data)
| Out-of-bag |
Tree | MSE %Var(y) |
300 | 5.546 87.80 |
| Out-of-bag |
Tree | MSE %Var(y) |
300 | 5.56 88.01 |
| Out-of-bag |
Tree | MSE %Var(y) |
300 | 5.312 84.08 |
| Out-of-bag |
Tree | MSE %Var(y) |
300 | 5.467 86.55 |
| Out-of-bag |
Tree | MSE %Var(y) |
300 | 5.482 86.77 |
birdlife_city_data_fixed
select_variables_from_random_forest(birdlife_city_data_fixed)
[1] "population_growth" "birdlife_pool_size" "region_100km_cultivated"
[4] "biome_name" "region_20km_average_pop_density" "rainfall_monthly_min"
[7] "percentage_urban_area_as_open_public_spaces" "region_50km_cultivated" "permanent_water"
[10] "region_50km_average_pop_density" "rainfall_monthly_max" "mean_population_exposure_to_pm2_5_2019"
[13] "temperature_monthly_min" "shrubs" "temperature_annual_average"
[16] "region_100km_average_pop_density" "region_100km_urban" "region_20km_cultivated"
[19] "percentage_urban_area_as_open_public_spaces_and_streets" "region_20km_elevation_delta" "share_of_population_within_400m_of_open_space"
[22] "region_20km_urban" "city_average_pop_density" "happiness_future_life"
[25] "region_50km_elevation_delta" "region_50km_urban" "open_forest"
[28] "percentage_urban_area_as_streets" "temperature_monthly_max" "realm"
[31] "city_max_pop_density" "city_elevation_delta" "rainfall_annual_average"
[34] "city_gdp_per_population" "cultivated" "happiness_negative_effect"
[37] "region_100km_mean_elevation" "region_50km_mean_elevation" "city_mean_elevation"
[40] "closed_forest" "happiness_positive_effect" "herbaceous_wetland"
[43] "urban" "herbaceous_vegetation"
select_variables_from_random_forest(birdlife_city_data_fixed_single_scale)
[1] "population_growth" "birdlife_pool_size" "region_100km_cultivated"
[4] "percentage_urban_area_as_open_public_spaces" "biome_name" "rainfall_monthly_min"
[7] "region_20km_average_pop_density" "permanent_water" "rainfall_monthly_max"
[10] "temperature_annual_average" "temperature_monthly_min" "mean_population_exposure_to_pm2_5_2019"
[13] "region_100km_urban" "shrubs" "region_20km_elevation_delta"
[16] "percentage_urban_area_as_open_public_spaces_and_streets" "share_of_population_within_400m_of_open_space" "realm"
[19] "city_average_pop_density" "open_forest" "happiness_future_life"
[22] "city_elevation_delta" "temperature_monthly_max" "rainfall_annual_average"
[25] "percentage_urban_area_as_streets" "city_gdp_per_population" "cultivated"
[28] "happiness_negative_effect" "closed_forest" "region_50km_mean_elevation"
[31] "city_mean_elevation" "herbaceous_wetland" "urban"
[34] "herbaceous_vegetation"
create_fifty_rows_of_oob(birdlife_city_data_fixed[,c("response", "population_growth")])
[1] "Mean 6.35474438479356 , SD: 0.0712540254137255 , Mean + SD: 6.42599841020729"
create_fifty_rows_of_oob(birdlife_city_data_fixed[,c("response", "population_growth", "birdlife_pool_size")])
[1] "Mean 5.53431593845832 , SD: 0.0808938134022798 , Mean + SD: 5.6152097518606"
create_fifty_rows_of_oob(birdlife_city_data_fixed[,c("response", "population_growth", "birdlife_pool_size", "region_100km_cultivated")])
[1] "Mean 5.03645460544453 , SD: 0.080993921046237 , Mean + SD: 5.11744852649077"
create_fifty_rows_of_oob(birdlife_city_data_fixed[,c("response", "population_growth", "birdlife_pool_size", "region_100km_cultivated", "percentage_urban_area_as_open_public_spaces", "biome_name")])
[1] "Mean 5.01181482503518 , SD: 0.0829095109876754 , Mean + SD: 5.09472433602285"
create_fifty_rows_of_oob(birdlife_city_data_fixed[,c("response", "population_growth", "birdlife_pool_size", "region_100km_cultivated", "percentage_urban_area_as_open_public_spaces", "biome_name", "rainfall_monthly_min")])
[1] "Mean 4.97549471316348 , SD: 0.0685833826522881 , Mean + SD: 5.04407809581577"
create_fifty_rows_of_oob(birdlife_city_data_fixed[,c("response", "population_growth", "birdlife_pool_size", "region_100km_cultivated", "percentage_urban_area_as_open_public_spaces", "biome_name", "rainfall_monthly_min", "region_20km_average_pop_density")])
[1] "Mean 4.86486157758304 , SD: 0.0978544677155927 , Mean + SD: 4.96271604529863"
create_fifty_rows_of_oob(birdlife_city_data_fixed[,c("response", "population_growth", "birdlife_pool_size", "region_100km_cultivated", "percentage_urban_area_as_open_public_spaces", "biome_name", "rainfall_monthly_min", "region_20km_average_pop_density", "permanent_water")])
[1] "Mean 4.75503603367959 , SD: 0.0820218992111515 , Mean + SD: 4.83705793289075"
create_fifty_rows_of_oob(birdlife_city_data_fixed[,c("response", "population_growth", "birdlife_pool_size", "region_100km_cultivated", "percentage_urban_area_as_open_public_spaces", "biome_name", "rainfall_monthly_min", "region_20km_average_pop_density", "permanent_water", "rainfall_monthly_max")])
[1] "Mean 4.83291787856041 , SD: 0.0877474150947955 , Mean + SD: 4.9206652936552"
create_fifty_rows_of_oob(birdlife_city_data_fixed[,c("response", "population_growth", "birdlife_pool_size", "region_100km_cultivated", "percentage_urban_area_as_open_public_spaces", "biome_name", "rainfall_monthly_min", "region_20km_average_pop_density", "permanent_water", "rainfall_monthly_max", "temperature_annual_average")])
[1] "Mean 4.89459108748752 , SD: 0.0675800594452371 , Mean + SD: 4.96217114693276"
create_fifty_rows_of_oob(birdlife_city_data_fixed[,c("response", "population_growth", "birdlife_pool_size", "region_100km_cultivated", "percentage_urban_area_as_open_public_spaces", "biome_name", "rainfall_monthly_min", "region_20km_average_pop_density", "permanent_water", "rainfall_monthly_max", "temperature_annual_average", "temperature_monthly_min")])
[1] "Mean 4.87244265396031 , SD: 0.0892281563604926 , Mean + SD: 4.9616708103208"
create_fifty_rows_of_oob(birdlife_city_data_fixed[,c("response", "population_growth", "birdlife_pool_size", "region_100km_cultivated", "percentage_urban_area_as_open_public_spaces", "biome_name", "rainfall_monthly_min", "region_20km_average_pop_density", "permanent_water", "rainfall_monthly_max", "temperature_annual_average", "temperature_monthly_min", "mean_population_exposure_to_pm2_5_2019")])
[1] "Mean 4.83199994339927 , SD: 0.0822602681223139 , Mean + SD: 4.91426021152158"
create_fifty_rows_of_oob(birdlife_city_data_fixed[,c("response", "population_growth", "birdlife_pool_size", "region_100km_cultivated", "percentage_urban_area_as_open_public_spaces", "biome_name", "rainfall_monthly_min", "region_20km_average_pop_density", "permanent_water", "rainfall_monthly_max", "temperature_annual_average", "temperature_monthly_min", "mean_population_exposure_to_pm2_5_2019", "region_100km_urban")])
[1] "Mean 4.82180678041379 , SD: 0.0733987377154572 , Mean + SD: 4.89520551812925"
create_fifty_rows_of_oob(birdlife_city_data_fixed[,c("response", "population_growth", "birdlife_pool_size", "region_100km_cultivated", "percentage_urban_area_as_open_public_spaces", "biome_name", "rainfall_monthly_min", "region_20km_average_pop_density", "permanent_water", "rainfall_monthly_max", "temperature_annual_average", "temperature_monthly_min", "mean_population_exposure_to_pm2_5_2019", "region_100km_urban", "shrubs")])
[1] "Mean 4.88760637080854 , SD: 0.0708285469155194 , Mean + SD: 4.95843491772406"
create_fifty_rows_of_oob(birdlife_city_data_fixed[,c("response", "population_growth", "birdlife_pool_size", "region_100km_cultivated", "percentage_urban_area_as_open_public_spaces", "biome_name", "rainfall_monthly_min", "region_20km_average_pop_density", "permanent_water", "rainfall_monthly_max", "temperature_annual_average", "temperature_monthly_min", "mean_population_exposure_to_pm2_5_2019", "region_100km_urban", "shrubs", "region_20km_elevation_delta")])
[1] "Mean 4.92024268589129 , SD: 0.102386290093399 , Mean + SD: 5.02262897598469"
create_fifty_rows_of_oob(birdlife_city_data_fixed[,c("response", "population_growth", "birdlife_pool_size", "region_100km_cultivated", "percentage_urban_area_as_open_public_spaces", "biome_name", "rainfall_monthly_min", "region_20km_average_pop_density", "permanent_water", "rainfall_monthly_max", "temperature_annual_average", "temperature_monthly_min", "mean_population_exposure_to_pm2_5_2019", "region_100km_urban", "shrubs", "region_20km_elevation_delta", "percentage_urban_area_as_open_public_spaces_and_streets")])
[1] "Mean 4.91143919034019 , SD: 0.0837468464163517 , Mean + SD: 4.99518603675654"
create_fifty_rows_of_oob(birdlife_city_data_fixed[,c("response", "population_growth", "birdlife_pool_size", "region_100km_cultivated", "percentage_urban_area_as_open_public_spaces", "biome_name", "rainfall_monthly_min", "region_20km_average_pop_density", "permanent_water", "rainfall_monthly_max", "temperature_annual_average", "temperature_monthly_min", "mean_population_exposure_to_pm2_5_2019", "region_100km_urban", "shrubs", "region_20km_elevation_delta", "percentage_urban_area_as_open_public_spaces_and_streets", "share_of_population_within_400m_of_open_space")])
[1] "Mean 4.96416771734588 , SD: 0.0825668458432887 , Mean + SD: 5.04673456318917"
create_fifty_rows_of_oob(birdlife_city_data_fixed[,c("response", "population_growth", "birdlife_pool_size", "region_100km_cultivated", "percentage_urban_area_as_open_public_spaces", "biome_name", "rainfall_monthly_min", "region_20km_average_pop_density", "permanent_water", "rainfall_monthly_max", "temperature_annual_average", "temperature_monthly_min", "mean_population_exposure_to_pm2_5_2019", "region_100km_urban", "shrubs", "region_20km_elevation_delta", "percentage_urban_area_as_open_public_spaces_and_streets", "share_of_population_within_400m_of_open_space", "realm")])
[1] "Mean 4.98107377749941 , SD: 0.0814576329581648 , Mean + SD: 5.06253141045758"
create_fifty_rows_of_oob(birdlife_city_data_fixed[,c("response", "population_growth", "birdlife_pool_size", "region_100km_cultivated", "percentage_urban_area_as_open_public_spaces", "biome_name", "rainfall_monthly_min", "region_20km_average_pop_density", "permanent_water", "rainfall_monthly_max", "temperature_annual_average", "temperature_monthly_min", "mean_population_exposure_to_pm2_5_2019", "region_100km_urban", "shrubs", "region_20km_elevation_delta", "percentage_urban_area_as_open_public_spaces_and_streets", "share_of_population_within_400m_of_open_space", "realm", "city_average_pop_density")])
[1] "Mean 4.9987069406936 , SD: 0.0742949984686252 , Mean + SD: 5.07300193916222"
create_fifty_rows_of_oob(birdlife_city_data_fixed[,c("response", "population_growth", "birdlife_pool_size", "region_100km_cultivated", "percentage_urban_area_as_open_public_spaces", "biome_name", "rainfall_monthly_min", "region_20km_average_pop_density", "permanent_water", "rainfall_monthly_max", "temperature_annual_average", "temperature_monthly_min", "mean_population_exposure_to_pm2_5_2019", "region_100km_urban", "shrubs", "region_20km_elevation_delta", "percentage_urban_area_as_open_public_spaces_and_streets", "share_of_population_within_400m_of_open_space", "realm", "city_average_pop_density", "open_forest", "happiness_future_life")])
[1] "Mean 5.02149734174997 , SD: 0.0854499582879466 , Mean + SD: 5.10694730003792"
“population_growth”, “birdlife_pool_size”, “region_100km_cultivated”, “percentage_urban_area_as_open_public_spaces”, “biome_name”, “rainfall_monthly_min”, “region_20km_average_pop_density”, “permanent_water”
either_city_data <- fetch_city_data_for('either')
── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
cols(
name = col_character(),
response = col_double()
)
Joining, by = "name"
either_city_data
either_city_data_fixed <- rfImpute(response ~ ., either_city_data)
| Out-of-bag |
Tree | MSE %Var(y) |
300 | 4.824 94.87 |
| Out-of-bag |
Tree | MSE %Var(y) |
300 | 4.613 90.71 |
| Out-of-bag |
Tree | MSE %Var(y) |
300 | 4.655 91.54 |
| Out-of-bag |
Tree | MSE %Var(y) |
300 | 4.605 90.56 |
| Out-of-bag |
Tree | MSE %Var(y) |
300 | 4.581 90.08 |
either_city_data_fixed
select_variables_from_random_forest(either_city_data_fixed)
[1] "either_pool_size" "population_growth" "region_100km_cultivated"
[4] "region_20km_average_pop_density" "realm" "region_50km_cultivated"
[7] "region_50km_average_pop_density" "biome_name" "shrubs"
[10] "rainfall_monthly_min" "region_100km_average_pop_density" "permanent_water"
[13] "region_20km_cultivated" "temperature_monthly_min" "region_50km_elevation_delta"
[16] "region_20km_urban" "mean_population_exposure_to_pm2_5_2019" "region_20km_elevation_delta"
[19] "percentage_urban_area_as_open_public_spaces" "city_average_pop_density" "happiness_future_life"
[22] "rainfall_monthly_max" "temperature_annual_average" "temperature_monthly_max"
[25] "region_100km_urban" "region_50km_urban" "cultivated"
[28] "share_of_population_within_400m_of_open_space" "city_max_pop_density" "city_elevation_delta"
[31] "city_mean_elevation" "herbaceous_wetland" "rainfall_annual_average"
[34] "region_100km_elevation_delta" "city_gdp_per_population" "region_20km_mean_elevation"
[37] "percentage_urban_area_as_open_public_spaces_and_streets" "region_50km_mean_elevation" "region_100km_mean_elevation"
[40] "happiness_negative_effect" "open_forest" "urban"
[43] "happiness_positive_effect" "herbaceous_vegetation" "percentage_urban_area_as_streets"
[46] "closed_forest"
select_variables_from_random_forest(either_city_data_fixed_single_scale)
[1] "either_pool_size" "population_growth" "region_100km_cultivated"
[4] "region_20km_average_pop_density" "realm" "biome_name"
[7] "rainfall_monthly_min" "shrubs" "temperature_monthly_min"
[10] "permanent_water" "percentage_urban_area_as_open_public_spaces" "region_20km_urban"
[13] "region_50km_elevation_delta" "mean_population_exposure_to_pm2_5_2019" "city_average_pop_density"
[16] "rainfall_monthly_max" "happiness_future_life" "cultivated"
[19] "share_of_population_within_400m_of_open_space" "city_elevation_delta" "city_max_pop_density"
[22] "rainfall_annual_average" "city_mean_elevation" "percentage_urban_area_as_open_public_spaces_and_streets"
[25] "temperature_monthly_max" "region_20km_mean_elevation" "open_forest"
[28] "happiness_negative_effect" "urban" "percentage_urban_area_as_streets"
[31] "closed_forest"
create_fifty_rows_of_oob(either_city_data_fixed[,c("response", "either_pool_size")])
[1] "Mean 4.69133292981281 , SD: 0.0489285631824466 , Mean + SD: 4.74026149299525"
create_fifty_rows_of_oob(either_city_data_fixed[,c("response", "either_pool_size", "population_growth")])
[1] "Mean 4.18668186942246 , SD: 0.0652412455486544 , Mean + SD: 4.25192311497111"
create_fifty_rows_of_oob(either_city_data_fixed[,c("response", "either_pool_size", "population_growth", "region_100km_cultivated")])
[1] "Mean 4.08678736410676 , SD: 0.05782944992901 , Mean + SD: 4.14461681403577"
create_fifty_rows_of_oob(either_city_data_fixed[,c("response", "either_pool_size", "population_growth", "region_100km_cultivated", "region_20km_average_pop_density")])
[1] "Mean 3.7612122389608 , SD: 0.0626910363388465 , Mean + SD: 3.82390327529965"
create_fifty_rows_of_oob(either_city_data_fixed[,c("response", "either_pool_size", "population_growth", "region_100km_cultivated", "region_20km_average_pop_density", "realm")])
[1] "Mean 3.62757829197622 , SD: 0.0641414031735027 , Mean + SD: 3.69171969514973"
create_fifty_rows_of_oob(either_city_data_fixed[,c("response", "either_pool_size", "population_growth", "region_100km_cultivated", "region_20km_average_pop_density", "realm", "biome_name")])
[1] "Mean 3.88541888033384 , SD: 0.0697761912903623 , Mean + SD: 3.9551950716242"
create_fifty_rows_of_oob(either_city_data_fixed[,c("response", "either_pool_size", "population_growth", "region_100km_cultivated", "region_20km_average_pop_density", "realm", "biome_name", "rainfall_monthly_min")])
[1] "Mean 3.94442754211899 , SD: 0.0705357146629719 , Mean + SD: 4.01496325678196"
create_fifty_rows_of_oob(either_city_data_fixed[,c("response", "either_pool_size", "population_growth", "region_100km_cultivated", "region_20km_average_pop_density", "realm", "biome_name", "rainfall_monthly_min", "shrubs")])
[1] "Mean 3.94268869246952 , SD: 0.0683286721896993 , Mean + SD: 4.01101736465922"
create_fifty_rows_of_oob(either_city_data_fixed[,c("response", "either_pool_size", "population_growth", "region_100km_cultivated", "region_20km_average_pop_density", "realm", "biome_name", "rainfall_monthly_min", "shrubs", "temperature_monthly_min")])
[1] "Mean 3.91638012322931 , SD: 0.058327290718483 , Mean + SD: 3.97470741394779"
create_fifty_rows_of_oob(either_city_data_fixed[,c("response", "either_pool_size", "population_growth", "region_100km_cultivated", "region_20km_average_pop_density", "realm", "biome_name", "rainfall_monthly_min", "shrubs", "temperature_monthly_min", "permanent_water")])
[1] "Mean 3.97807710896339 , SD: 0.0819372375299245 , Mean + SD: 4.06001434649332"
create_fifty_rows_of_oob(either_city_data_fixed[,c("response", "either_pool_size", "population_growth", "region_100km_cultivated", "region_20km_average_pop_density", "realm", "biome_name", "rainfall_monthly_min", "shrubs", "temperature_monthly_min", "permanent_water", "percentage_urban_area_as_open_public_spaces")])
[1] "Mean 4.08096128871886 , SD: 0.0742590817487434 , Mean + SD: 4.15522037046761"
create_fifty_rows_of_oob(either_city_data_fixed[,c("response", "either_pool_size", "population_growth", "region_100km_cultivated", "region_20km_average_pop_density", "realm", "biome_name", "rainfall_monthly_min", "shrubs", "temperature_monthly_min", "permanent_water", "percentage_urban_area_as_open_public_spaces", "region_20km_urban")])
[1] "Mean 4.10276372102361 , SD: 0.0793013544355574 , Mean + SD: 4.18206507545916"
create_fifty_rows_of_oob(either_city_data_fixed[,c("response", "either_pool_size", "population_growth", "region_100km_cultivated", "region_20km_average_pop_density", "realm", "biome_name", "rainfall_monthly_min", "shrubs", "temperature_monthly_min", "permanent_water", "percentage_urban_area_as_open_public_spaces", "region_20km_urban", "region_50km_elevation_delta")])
[1] "Mean 4.14360043109383 , SD: 0.0571122481729091 , Mean + SD: 4.20071267926674"
create_fifty_rows_of_oob(either_city_data_fixed[,c("response", "either_pool_size", "population_growth", "region_100km_cultivated", "region_20km_average_pop_density", "realm", "biome_name", "rainfall_monthly_min", "shrubs", "temperature_monthly_min", "permanent_water", "percentage_urban_area_as_open_public_spaces", "region_20km_urban", "region_50km_elevation_delta", "mean_population_exposure_to_pm2_5_2019")])
[1] "Mean 4.17420426151807 , SD: 0.0800438347270345 , Mean + SD: 4.25424809624511"
create_fifty_rows_of_oob(either_city_data_fixed[,c("response", "either_pool_size", "population_growth", "region_100km_cultivated", "region_20km_average_pop_density", "realm", "biome_name", "rainfall_monthly_min", "shrubs", "temperature_monthly_min", "permanent_water", "percentage_urban_area_as_open_public_spaces", "region_20km_urban", "region_50km_elevation_delta", "mean_population_exposure_to_pm2_5_2019", "city_average_pop_density")])
[1] "Mean 4.20142417235852 , SD: 0.0534400086677398 , Mean + SD: 4.25486418102626"
create_fifty_rows_of_oob(either_city_data_fixed[,c("response", "either_pool_size", "population_growth", "region_100km_cultivated", "region_20km_average_pop_density", "realm", "biome_name", "rainfall_monthly_min", "shrubs", "temperature_monthly_min", "permanent_water", "percentage_urban_area_as_open_public_spaces", "region_20km_urban", "region_50km_elevation_delta", "mean_population_exposure_to_pm2_5_2019", "city_average_pop_density", "rainfall_monthly_max")])
[1] "Mean 4.23495161199434 , SD: 0.0810189022929709 , Mean + SD: 4.31597051428731"
create_fifty_rows_of_oob(either_city_data_fixed[,c("response", "either_pool_size", "population_growth", "region_100km_cultivated", "region_20km_average_pop_density", "realm", "biome_name", "rainfall_monthly_min", "shrubs", "temperature_monthly_min", "permanent_water", "percentage_urban_area_as_open_public_spaces", "region_20km_urban", "region_50km_elevation_delta", "mean_population_exposure_to_pm2_5_2019", "city_average_pop_density", "rainfall_monthly_max", "happiness_future_life")])
[1] "Mean 4.25511343099509 , SD: 0.0777504142418531 , Mean + SD: 4.33286384523695"
create_fifty_rows_of_oob(either_city_data_fixed[,c("response", "either_pool_size", "population_growth", "region_100km_cultivated", "region_20km_average_pop_density", "realm", "biome_name", "rainfall_monthly_min", "shrubs", "temperature_monthly_min", "permanent_water", "percentage_urban_area_as_open_public_spaces", "region_20km_urban", "region_50km_elevation_delta", "mean_population_exposure_to_pm2_5_2019", "city_average_pop_density", "rainfall_monthly_max", "happiness_future_life", "cultivated")])
[1] "Mean 4.27187737013208 , SD: 0.0659717347812921 , Mean + SD: 4.33784910491338"
create_fifty_rows_of_oob(either_city_data_fixed[,c("response", "either_pool_size", "population_growth", "region_100km_cultivated", "region_20km_average_pop_density", "realm", "biome_name", "rainfall_monthly_min", "shrubs", "temperature_monthly_min", "permanent_water", "percentage_urban_area_as_open_public_spaces", "region_20km_urban", "region_50km_elevation_delta", "mean_population_exposure_to_pm2_5_2019", "city_average_pop_density", "rainfall_monthly_max", "happiness_future_life", "cultivated", "share_of_population_within_400m_of_open_space")])
[1] "Mean 4.32785556380064 , SD: 0.0915334527831198 , Mean + SD: 4.41938901658376"
create_fifty_rows_of_oob(either_city_data_fixed[,c("response", "either_pool_size", "population_growth", "region_100km_cultivated", "region_20km_average_pop_density", "realm", "biome_name", "rainfall_monthly_min", "shrubs", "temperature_monthly_min", "permanent_water", "percentage_urban_area_as_open_public_spaces", "region_20km_urban", "region_50km_elevation_delta", "mean_population_exposure_to_pm2_5_2019", "city_average_pop_density", "rainfall_monthly_max", "happiness_future_life", "cultivated", "share_of_population_within_400m_of_open_space", "city_elevation_delta")])
[1] "Mean 4.33688765161498 , SD: 0.0687985208297185 , Mean + SD: 4.4056861724447"
create_fifty_rows_of_oob(either_city_data_fixed[,c("response", "either_pool_size", "population_growth", "region_100km_cultivated", "region_20km_average_pop_density", "realm", "biome_name", "rainfall_monthly_min", "shrubs", "temperature_monthly_min", "permanent_water", "percentage_urban_area_as_open_public_spaces", "region_20km_urban", "region_50km_elevation_delta", "mean_population_exposure_to_pm2_5_2019", "city_average_pop_density", "rainfall_monthly_max", "happiness_future_life", "cultivated", "share_of_population_within_400m_of_open_space", "city_elevation_delta", "city_max_pop_density")])
[1] "Mean 4.35933973615371 , SD: 0.0853454117772923 , Mean + SD: 4.444685147931"
“either_pool_size”, “population_growth”, “region_100km_cultivated”, “region_20km_average_pop_density”, “realm”
both_city_data <- fetch_city_data_for('both')
── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
cols(
name = col_character(),
response = col_double()
)
Joining, by = "name"
both_city_data
both_city_data_fixed <- rfImpute(response ~ ., both_city_data)
| Out-of-bag |
Tree | MSE %Var(y) |
300 | 16.32 99.56 |
| Out-of-bag |
Tree | MSE %Var(y) |
300 | 15.46 94.36 |
| Out-of-bag |
Tree | MSE %Var(y) |
300 | 15.88 96.87 |
| Out-of-bag |
Tree | MSE %Var(y) |
300 | 15.65 95.52 |
| Out-of-bag |
Tree | MSE %Var(y) |
300 | 15.91 97.08 |
both_city_data_fixed
select_variables_from_random_forest(both_city_data_fixed)
[1] "both_pool_size" "temperature_annual_average" "temperature_monthly_min"
[4] "permanent_water" "happiness_negative_effect" "region_20km_urban"
[7] "region_100km_cultivated" "region_50km_cultivated" "realm"
[10] "region_20km_cultivated" "rainfall_monthly_min" "region_50km_elevation_delta"
[13] "population_growth" "shrubs" "region_100km_elevation_delta"
[16] "region_20km_average_pop_density" "region_100km_urban" "biome_name"
[19] "region_20km_elevation_delta" "region_50km_urban" "percentage_urban_area_as_open_public_spaces"
[22] "city_average_pop_density" "city_gdp_per_population" "region_50km_average_pop_density"
[25] "open_forest" "herbaceous_wetland" "cultivated"
[28] "region_100km_average_pop_density" "region_20km_mean_elevation" "share_of_population_within_400m_of_open_space"
[31] "mean_population_exposure_to_pm2_5_2019" "city_elevation_delta" "region_50km_mean_elevation"
[34] "happiness_future_life" "happiness_positive_effect" "rainfall_monthly_max"
[37] "percentage_urban_area_as_open_public_spaces_and_streets" "herbaceous_vegetation" "temperature_monthly_max"
[40] "percentage_urban_area_as_streets" "rainfall_annual_average" "urban"
[43] "closed_forest"
select_variables_from_random_forest(both_city_data_fixed_single_scale)
[1] "both_pool_size" "temperature_annual_average" "temperature_monthly_min"
[4] "permanent_water" "happiness_negative_effect" "region_20km_urban"
[7] "rainfall_monthly_min" "realm" "region_100km_cultivated"
[10] "region_50km_elevation_delta" "population_growth" "percentage_urban_area_as_open_public_spaces"
[13] "shrubs" "biome_name" "region_20km_average_pop_density"
[16] "city_mean_elevation" "city_gdp_per_population" "share_of_population_within_400m_of_open_space"
[19] "cultivated" "open_forest" "region_20km_mean_elevation"
[22] "rainfall_monthly_max" "temperature_monthly_max" "rainfall_annual_average"
[25] "percentage_urban_area_as_streets" "closed_forest" "urban"
create_fifty_rows_of_oob(both_city_data_fixed[,c("response", "both_pool_size")])
[1] "Mean 17.0501746482574 , SD: 0.180832956779973 , Mean + SD: 17.2310076050374"
create_fifty_rows_of_oob(both_city_data_fixed[,c("response", "both_pool_size", "temperature_annual_average")])
[1] "Mean 14.1770117377572 , SD: 0.12741540946317 , Mean + SD: 14.3044271472204"
create_fifty_rows_of_oob(both_city_data_fixed[,c("response", "both_pool_size", "temperature_annual_average", "temperature_monthly_min")])
[1] "Mean 13.9947226485825 , SD: 0.193418308082352 , Mean + SD: 14.1881409566648"
create_fifty_rows_of_oob(both_city_data_fixed[,c("response", "both_pool_size", "temperature_annual_average", "temperature_monthly_min", "permanent_water")])
[1] "Mean 13.980419606819 , SD: 0.198277385993035 , Mean + SD: 14.1786969928121"
create_fifty_rows_of_oob(both_city_data_fixed[,c("response", "both_pool_size", "temperature_annual_average", "temperature_monthly_min", "permanent_water", "happiness_negative_effect")])
[1] "Mean 14.250869948595 , SD: 0.205833621996072 , Mean + SD: 14.4567035705911"
create_fifty_rows_of_oob(both_city_data_fixed[,c("response", "both_pool_size", "temperature_annual_average", "temperature_monthly_min", "permanent_water", "happiness_negative_effect", "region_20km_urban")])
[1] "Mean 13.831253031623 , SD: 0.267164827147086 , Mean + SD: 14.0984178587701"
create_fifty_rows_of_oob(both_city_data_fixed[,c("response", "both_pool_size", "temperature_annual_average", "temperature_monthly_min", "permanent_water", "happiness_negative_effect", "region_20km_urban", "rainfall_monthly_min")])
[1] "Mean 14.0176076470363 , SD: 0.233933915226803 , Mean + SD: 14.2515415622631"
create_fifty_rows_of_oob(both_city_data_fixed[,c("response", "both_pool_size", "temperature_annual_average", "temperature_monthly_min", "permanent_water", "happiness_negative_effect", "region_20km_urban", "rainfall_monthly_min", "realm")])
[1] "Mean 13.9678429656754 , SD: 0.27500268619645 , Mean + SD: 14.2428456518719"
create_fifty_rows_of_oob(both_city_data_fixed[,c("response", "both_pool_size", "temperature_annual_average", "temperature_monthly_min", "permanent_water", "happiness_negative_effect", "region_20km_urban", "rainfall_monthly_min", "realm", "region_100km_cultivated"),])
[1] "Mean 13.8133557218721 , SD: 0.208047207294211 , Mean + SD: 14.0214029291663"
create_fifty_rows_of_oob(both_city_data_fixed[,c("response", "both_pool_size", "temperature_annual_average", "temperature_monthly_min", "permanent_water", "happiness_negative_effect", "region_20km_urban", "rainfall_monthly_min", "realm", "region_100km_cultivated", "region_50km_elevation_delta"),])
[1] "Mean 14.1587620560898 , SD: 0.265407381711604 , Mean + SD: 14.4241694378014"
create_fifty_rows_of_oob(both_city_data_fixed[,c("response", "both_pool_size", "temperature_annual_average", "temperature_monthly_min", "permanent_water", "happiness_negative_effect", "region_20km_urban", "rainfall_monthly_min", "realm", "region_100km_cultivated", "region_50km_elevation_delta", "population_growth"),])
[1] "Mean 14.3695231260649 , SD: 0.277971868419252 , Mean + SD: 14.6474949944841"
create_fifty_rows_of_oob(both_city_data_fixed[,c("response", "both_pool_size", "temperature_annual_average", "temperature_monthly_min", "permanent_water", "happiness_negative_effect", "region_20km_urban", "rainfall_monthly_min", "realm", "region_100km_cultivated", "region_50km_elevation_delta", "population_growth", "percentage_urban_area_as_open_public_spaces"),])
[1] "Mean 14.6989193629512 , SD: 0.281371086172517 , Mean + SD: 14.9802904491237"
create_fifty_rows_of_oob(both_city_data_fixed[,c("response", "both_pool_size", "temperature_annual_average", "temperature_monthly_min", "permanent_water", "happiness_negative_effect", "region_20km_urban", "rainfall_monthly_min", "realm", "region_100km_cultivated", "region_50km_elevation_delta", "population_growth", "percentage_urban_area_as_open_public_spaces", "shrubs"),])
[1] "Mean 14.6255340760297 , SD: 0.287689760117654 , Mean + SD: 14.9132238361473"
create_fifty_rows_of_oob(both_city_data_fixed[,c("response", "both_pool_size", "temperature_annual_average", "temperature_monthly_min", "permanent_water", "happiness_negative_effect", "region_20km_urban", "rainfall_monthly_min", "realm", "region_100km_cultivated", "region_50km_elevation_delta", "population_growth", "percentage_urban_area_as_open_public_spaces", "shrubs", "biome_name"),])
[1] "Mean 14.6561287164134 , SD: 0.265987324658439 , Mean + SD: 14.9221160410718"
create_fifty_rows_of_oob(both_city_data_fixed[,c("response", "both_pool_size", "temperature_annual_average", "temperature_monthly_min", "permanent_water", "happiness_negative_effect", "region_20km_urban", "rainfall_monthly_min", "realm", "region_100km_cultivated", "region_50km_elevation_delta", "population_growth", "percentage_urban_area_as_open_public_spaces", "shrubs", "biome_name", "region_20km_average_pop_density"),])
[1] "Mean 14.7323907720158 , SD: 0.194408848363496 , Mean + SD: 14.9267996203793"
create_fifty_rows_of_oob(both_city_data_fixed[,c("response", "both_pool_size", "temperature_annual_average", "temperature_monthly_min", "permanent_water", "happiness_negative_effect", "region_20km_urban", "rainfall_monthly_min", "realm", "region_100km_cultivated", "region_50km_elevation_delta", "population_growth", "percentage_urban_area_as_open_public_spaces", "shrubs", "biome_name", "region_20km_average_pop_density", "city_mean_elevation"),])
[1] "Mean 14.7915916869301 , SD: 0.298364380477512 , Mean + SD: 15.0899560674076"
create_fifty_rows_of_oob(both_city_data_fixed[,c("response", "both_pool_size", "temperature_annual_average", "temperature_monthly_min", "permanent_water", "happiness_negative_effect", "region_20km_urban", "rainfall_monthly_min", "realm", "region_100km_cultivated", "region_50km_elevation_delta", "population_growth", "percentage_urban_area_as_open_public_spaces", "shrubs", "biome_name", "region_20km_average_pop_density", "city_mean_elevation", "city_gdp_per_population"),])
[1] "Mean 14.9134394356877 , SD: 0.224050531561373 , Mean + SD: 15.137489967249"
create_fifty_rows_of_oob(both_city_data_fixed[,c("response", "both_pool_size", "temperature_annual_average", "temperature_monthly_min", "permanent_water", "happiness_negative_effect", "region_20km_urban", "rainfall_monthly_min", "realm", "region_100km_cultivated", "region_50km_elevation_delta", "population_growth", "percentage_urban_area_as_open_public_spaces", "shrubs", "biome_name", "region_20km_average_pop_density", "city_mean_elevation", "city_gdp_per_population", "share_of_population_within_400m_of_open_space"),])
[1] "Mean 15.0099497484692 , SD: 0.2218461976947 , Mean + SD: 15.2317959461639"
“both_pool_size”, “temperature_annual_average”, “happiness_negative_effect”
| So…. |
|---|
| “merlin_pool_size”, “realm” “population_growth”, “birdlife_pool_size”, “region_100km_cultivated”, “percentage_urban_area_as_open_public_spaces”, “biome_name”, “rainfall_monthly_min”, “region_20km_average_pop_density”, “permanent_water” “either_pool_size”, “population_growth”, “region_100km_cultivated”, “region_20km_average_pop_density”, “realm” “both_pool_size”, “temperature_annual_average”, “temperature_monthly_min” |
| ```r summary(lm(response ~ merlin_pool_size, merlin_city_data_fixed)) |
| ``` |
| ``` |
| Call: lm(formula = response ~ merlin_pool_size, data = merlin_city_data_fixed) |
| Residuals: Min 1Q Median 3Q Max -8.3644 -2.2493 -0.3649 1.7804 15.4604 |
| Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 6.205975 0.920945 6.739 4.23e-10 merlin_pool_size -0.022439 0.003134 -7.160 4.71e-11 |
Signif. codes: 0 ‘’ 0.001 ‘’ 0.01 ‘’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 3.641 on 135 degrees of freedom Multiple R-squared: 0.2752, Adjusted R-squared: 0.2698 F-statistic: 51.26 on 1 and 135 DF, p-value: 4.707e-11
<!-- rnb-output-end -->
<!-- rnb-source-begin eyJkYXRhIjoiYGBgclxuc3VtbWFyeShsbShyZXNwb25zZSB+IGJpcmRsaWZlX3Bvb2xfc2l6ZSwgYmlyZGxpZmVfY2l0eV9kYXRhX2ZpeGVkKSlcbmBgYCJ9 -->
```r
summary(lm(response ~ birdlife_pool_size, birdlife_city_data_fixed))
Call:
lm(formula = response ~ birdlife_pool_size, data = birdlife_city_data_fixed)
Residuals:
Min 1Q Median 3Q Max
-5.140 -1.330 -0.313 1.034 9.156
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 2.602931 0.625873 4.159 5.65e-05 ***
birdlife_pool_size -0.008789 0.002000 -4.395 2.23e-05 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 2.368 on 135 degrees of freedom
Multiple R-squared: 0.1252, Adjusted R-squared: 0.1187
F-statistic: 19.31 on 1 and 135 DF, p-value: 2.225e-05
summary(lm(response ~ either_pool_size, either_city_data_fixed))
Call:
lm(formula = response ~ either_pool_size, data = either_city_data_fixed)
Residuals:
Min 1Q Median 3Q Max
-4.8488 -1.0658 -0.3811 0.8665 6.5921
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 3.250304 0.584389 5.562 1.38e-07 ***
either_pool_size -0.009005 0.001546 -5.825 3.99e-08 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 2.031 on 135 degrees of freedom
Multiple R-squared: 0.2008, Adjusted R-squared: 0.1949
F-statistic: 33.92 on 1 and 135 DF, p-value: 3.99e-08
summary(lm(response ~ both_pool_size, both_city_data_fixed))
Call:
lm(formula = response ~ both_pool_size, data = both_city_data_fixed)
Residuals:
Min 1Q Median 3Q Max
-8.9674 -2.7370 -0.3475 1.8439 10.3398
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 5.261657 0.982371 5.356 3.56e-07 ***
both_pool_size -0.024842 0.004396 -5.651 9.08e-08 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 3.667 on 135 degrees of freedom
Multiple R-squared: 0.1913, Adjusted R-squared: 0.1853
F-statistic: 31.94 on 1 and 135 DF, p-value: 9.076e-08
summary(lm(response ~ region_100km_cultivated, merlin_city_data_fixed))
Call:
lm(formula = response ~ region_100km_cultivated, data = merlin_city_data_fixed)
Residuals:
Min 1Q Median 3Q Max
-8.7405 -2.8276 -0.5911 1.5098 18.0590
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -0.6281 0.5172 -1.214 0.2267
region_100km_cultivated 2.3444 1.3805 1.698 0.0918 .
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 4.232 on 135 degrees of freedom
Multiple R-squared: 0.02092, Adjusted R-squared: 0.01366
F-statistic: 2.884 on 1 and 135 DF, p-value: 0.09176
summary(lm(response ~ region_100km_cultivated, birdlife_city_data_fixed))
Call:
lm(formula = response ~ region_100km_cultivated, data = birdlife_city_data_fixed)
Residuals:
Min 1Q Median 3Q Max
-4.4506 -1.5884 -0.3702 1.3865 9.9581
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -0.6226 0.3002 -2.074 0.04001 *
region_100km_cultivated 2.3237 0.8013 2.900 0.00436 **
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 2.457 on 135 degrees of freedom
Multiple R-squared: 0.05864, Adjusted R-squared: 0.05167
F-statistic: 8.409 on 1 and 135 DF, p-value: 0.004359
summary(lm(response ~ region_100km_cultivated, either_city_data_fixed))
Call:
lm(formula = response ~ region_100km_cultivated, data = either_city_data_fixed)
Residuals:
Min 1Q Median 3Q Max
-4.6522 -1.4255 -0.2114 0.9771 6.3724
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -0.5459 0.2698 -2.024 0.04499 *
region_100km_cultivated 2.0373 0.7200 2.830 0.00537 **
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 2.207 on 135 degrees of freedom
Multiple R-squared: 0.05599, Adjusted R-squared: 0.049
F-statistic: 8.008 on 1 and 135 DF, p-value: 0.00537
summary(lm(response ~ region_100km_cultivated, both_city_data_fixed))
Call:
lm(formula = response ~ region_100km_cultivated, data = both_city_data_fixed)
Residuals:
Min 1Q Median 3Q Max
-8.439 -2.791 -0.689 1.898 12.088
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -0.7221 0.4908 -1.471 0.1436
region_100km_cultivated 2.6951 1.3099 2.057 0.0416 *
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 4.016 on 135 degrees of freedom
Multiple R-squared: 0.0304, Adjusted R-squared: 0.02322
F-statistic: 4.233 on 1 and 135 DF, p-value: 0.04157
ggplot() +
geom_point(aes(x = merlin_pool_size, y = region_100km_cultivated), merlin_city_data_fixed, color = "red") +
geom_point(aes(x = birdlife_pool_size, y = region_100km_cultivated), birdlife_city_data_fixed, color = "blue") +
geom_point(aes(x = either_pool_size, y = region_100km_cultivated), either_city_data_fixed, color = "green") +
geom_point(aes(x = both_pool_size, y = region_100km_cultivated), both_city_data_fixed, color = "purple")
ggplot() +
geom_point(aes(x = population_growth, y = response), merlin_city_data_fixed, color = "red") +
geom_point(aes(x = population_growth, y = response), birdlife_city_data_fixed, color = "blue") +
geom_point(aes(x = population_growth, y = response), either_city_data_fixed, color = "green") +
geom_point(aes(x = population_growth, y = response), both_city_data_fixed, color = "purple")
ggplot() +
geom_point(aes(x = merlin_pool_size, y = region_20km_average_pop_density), merlin_city_data_fixed, color = "red") +
geom_point(aes(x = birdlife_pool_size, y = region_20km_average_pop_density), birdlife_city_data_fixed, color = "blue") +
geom_point(aes(x = either_pool_size, y = region_20km_average_pop_density), either_city_data_fixed, color = "green") +
geom_point(aes(x = both_pool_size, y = region_20km_average_pop_density), both_city_data_fixed, color = "purple")
summary(lm(response ~ population_growth, merlin_city_data_fixed))
Call:
lm(formula = response ~ population_growth, data = merlin_city_data_fixed)
Residuals:
Min 1Q Median 3Q Max
-9.2751 -2.8391 -0.4272 1.4837 18.4058
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.094091 0.524516 0.179 0.858
population_growth -0.001479 0.005915 -0.250 0.803
Residual standard error: 4.276 on 135 degrees of freedom
Multiple R-squared: 0.0004627, Adjusted R-squared: -0.006941
F-statistic: 0.0625 on 1 and 135 DF, p-value: 0.803
summary(lm(response ~ population_growth, birdlife_city_data_fixed))
Call:
lm(formula = response ~ population_growth, data = birdlife_city_data_fixed)
Residuals:
Min 1Q Median 3Q Max
-5.085 -1.538 -0.459 1.240 10.226
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.231365 0.309332 0.748 0.456
population_growth -0.003636 0.003489 -1.042 0.299
Residual standard error: 2.522 on 135 degrees of freedom
Multiple R-squared: 0.007984, Adjusted R-squared: 0.0006359
F-statistic: 1.087 on 1 and 135 DF, p-value: 0.2991
summary(lm(response ~ population_growth, either_city_data_fixed))
Call:
lm(formula = response ~ population_growth, data = either_city_data_fixed)
Residuals:
Min 1Q Median 3Q Max
-5.1409 -1.3284 -0.1829 0.8324 6.7919
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.113195 0.278318 0.407 0.685
population_growth -0.001779 0.003139 -0.567 0.572
Residual standard error: 2.269 on 135 degrees of freedom
Multiple R-squared: 0.002374, Adjusted R-squared: -0.005016
F-statistic: 0.3213 on 1 and 135 DF, p-value: 0.5718
summary(lm(response ~ population_growth, both_city_data_fixed))
Call:
lm(formula = response ~ population_growth, data = both_city_data_fixed)
Residuals:
Min 1Q Median 3Q Max
-9.1143 -2.5568 -0.7818 2.1289 12.4621
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.188410 0.499736 0.377 0.707
population_growth -0.002961 0.005636 -0.525 0.600
Residual standard error: 4.074 on 135 degrees of freedom
Multiple R-squared: 0.002041, Adjusted R-squared: -0.005351
F-statistic: 0.2761 on 1 and 135 DF, p-value: 0.6001
summary(lm(response ~ rainfall_monthly_min, merlin_city_data_fixed))
Call:
lm(formula = response ~ rainfall_monthly_min, data = merlin_city_data_fixed)
Residuals:
Min 1Q Median 3Q Max
-9.2835 -2.9452 -0.4893 1.4983 18.2505
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.191459 0.491332 0.390 0.697
rainfall_monthly_min -0.007481 0.012853 -0.582 0.562
Residual standard error: 4.272 on 135 degrees of freedom
Multiple R-squared: 0.002503, Adjusted R-squared: -0.004886
F-statistic: 0.3387 on 1 and 135 DF, p-value: 0.5615
summary(lm(response ~ rainfall_monthly_min, birdlife_city_data_fixed))
Call:
lm(formula = response ~ rainfall_monthly_min, data = birdlife_city_data_fixed)
Residuals:
Min 1Q Median 3Q Max
-5.0114 -1.4084 -0.4231 1.3632 10.6767
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.244199 0.289526 0.843 0.40
rainfall_monthly_min -0.009541 0.007574 -1.260 0.21
Residual standard error: 2.517 on 135 degrees of freedom
Multiple R-squared: 0.01162, Adjusted R-squared: 0.004298
F-statistic: 1.587 on 1 and 135 DF, p-value: 0.2099
summary(lm(response ~ rainfall_monthly_min, either_city_data_fixed))
Call:
lm(formula = response ~ rainfall_monthly_min, data = either_city_data_fixed)
Residuals:
Min 1Q Median 3Q Max
-5.1121 -1.3720 -0.2964 0.8111 6.5298
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.219743 0.259756 0.846 0.399
rainfall_monthly_min -0.008586 0.006795 -1.264 0.209
Residual standard error: 2.258 on 135 degrees of freedom
Multiple R-squared: 0.01169, Adjusted R-squared: 0.004367
F-statistic: 1.597 on 1 and 135 DF, p-value: 0.2086
summary(lm(response ~ rainfall_monthly_min, both_city_data_fixed))
Call:
lm(formula = response ~ rainfall_monthly_min, data = both_city_data_fixed)
Residuals:
Min 1Q Median 3Q Max
-9.0991 -2.8506 -0.8491 1.9009 12.2257
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.30602 0.46742 0.655 0.514
rainfall_monthly_min -0.01196 0.01223 -0.978 0.330
Residual standard error: 4.064 on 135 degrees of freedom
Multiple R-squared: 0.007033, Adjusted R-squared: -0.0003223
F-statistic: 0.9562 on 1 and 135 DF, p-value: 0.3299
ggplot() +
geom_point(aes(x = rainfall_monthly_min, y = response), merlin_city_data_fixed, color = "red") +
geom_point(aes(x = rainfall_monthly_min, y = response), birdlife_city_data_fixed, color = "blue") +
geom_point(aes(x = rainfall_monthly_min, y = response), either_city_data_fixed, color = "green") +
geom_point(aes(x = rainfall_monthly_min, y = response), both_city_data_fixed, color = "purple")
ggplot() +
geom_point(aes(x = temperature_annual_average, y = response), merlin_city_data_fixed, color = "red") +
geom_point(aes(x = temperature_annual_average, y = response), birdlife_city_data_fixed, color = "blue") +
geom_point(aes(x = temperature_annual_average, y = response), either_city_data_fixed, color = "green") +
geom_point(aes(x = temperature_annual_average, y = response), both_city_data_fixed, color = "purple")
ggplot() +
geom_point(aes(x = percentage_urban_area_as_open_public_spaces, y = response), merlin_city_data_fixed, color = "red") +
geom_point(aes(x = percentage_urban_area_as_open_public_spaces, y = response), birdlife_city_data_fixed, color = "blue") +
geom_point(aes(x = percentage_urban_area_as_open_public_spaces, y = response), either_city_data_fixed, color = "green") +
geom_point(aes(x = percentage_urban_area_as_open_public_spaces, y = response), both_city_data_fixed, color = "purple")
ggplot() +
geom_point(aes(x = happiness_negative_effect, y = response), merlin_city_data_fixed, color = "red") +
geom_point(aes(x = happiness_negative_effect, y = response), birdlife_city_data_fixed, color = "blue") +
geom_point(aes(x = happiness_negative_effect, y = response), either_city_data_fixed, color = "green") +
geom_point(aes(x = happiness_negative_effect, y = response), both_city_data_fixed, color = "purple")
ggplot() +
geom_boxplot(aes(x = response, y = biome_name), merlin_city_data_fixed)
ggplot() +
geom_boxplot(aes(x = response, y = biome_name), birdlife_city_data_fixed)
ggplot() +
geom_boxplot(aes(x = response, y = biome_name), either_city_data_fixed)
ggplot() +
geom_boxplot(aes(x = response, y = biome_name), both_city_data_fixed)
summary(lm(response ~ biome_name, merlin_city_data_fixed))
Call:
lm(formula = response ~ biome_name, data = merlin_city_data_fixed)
Residuals:
Min 1Q Median 3Q Max
-8.7663 -2.4594 -0.4676 2.1272 18.4309
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -3.2599 4.2666 -0.764 0.4463
biome_nameDeserts & Xeric Shrublands 3.1836 4.4563 0.714 0.4763
biome_nameFlooded Grasslands & Savannas 0.6618 5.2255 0.127 0.8994
biome_nameMangroves 9.3150 5.2255 1.783 0.0771 .
biome_nameMediterranean Forests, Woodlands & Scrub 3.2643 4.4066 0.741 0.4602
biome_nameMontane Grasslands & Shrublands 1.5344 5.2255 0.294 0.7695
biome_nameTemperate Broadleaf & Mixed Forests 3.2942 4.3328 0.760 0.4485
biome_nameTemperate Conifer Forests 3.3572 5.2255 0.642 0.5218
biome_nameTemperate Grasslands, Savannas & Shrublands 4.3835 4.6739 0.938 0.3501
biome_nameTropical & Subtropical Coniferous Forests 7.4846 5.2255 1.432 0.1546
biome_nameTropical & Subtropical Dry Broadleaf Forests 3.7631 4.4164 0.852 0.3958
biome_nameTropical & Subtropical Grasslands, Savannas & Shrublands 5.9138 4.6739 1.265 0.2081
biome_nameTropical & Subtropical Moist Broadleaf Forests 2.4622 4.3148 0.571 0.5693
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 4.267 on 124 degrees of freedom
Multiple R-squared: 0.08597, Adjusted R-squared: -0.002489
F-statistic: 0.9719 on 12 and 124 DF, p-value: 0.4793
| In Summary |
Response is related to number of species in regional pool, the more species, the less the percentage of species in the city. Indicating a fixed number of species are able to move into cities. The size of the regional pool is correlated with both the amount of urban and cultivated land cover, both reduce species in the regional pool.
Response is also lower in wet biomes and areas of the world, this is seen through the higher rainfall in the month with least rainfall in the year, and lower percentages in wetter biomes such as flooded grasslands and moist broadleaf forests.
Finally cities with a higher proportion of green public space are less likely to have a low response.
merlin_city_data_2 <- fetch_city_data_for('merlin', T)
birdlife_city_data_2 <- fetch_city_data_for('birdlife', T)
merlin_city_data$residuals_pool_size
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
3.6346589 -1.3808445 1.0649349 -3.4355790 -4.3126647 -0.7438113 -7.7336701 -2.4387017 1.4453341 -1.0023679 -3.6997976 -1.3953352 7.7497245 -0.8622274 0.5366795 4.5265798 1.0617137
18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34
-0.2523214 4.7814815 -3.1223646 1.7091096 -3.6108085 -0.1397141 15.4603887 -0.6266225 -3.0909248 1.0182028 1.7201615 -2.7031265 8.5064874 8.8776781 7.0569196 0.7062920 -1.2592215
35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51
1.2336978 0.1383472 7.0252049 2.2268554 -2.0718664 1.9814449 3.8241566 7.2649549 6.1520103 3.4917821 -0.1258741 1.8033208 0.4220553 -2.0346183 -3.6731730 -0.6081145 -4.7650229
52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68
-0.6374446 5.3381301 -0.5829402 -5.3003161 -1.6870541 3.8366707 4.8978947 0.2824830 -1.4631580 -1.6682595 -3.0684045 1.8602911 2.1229221 -3.4328909 0.7519428 -2.0768581 -1.3890451
69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85
-5.0351403 -2.1185232 -0.7391181 2.2308499 0.1957247 1.8741271 -3.0258162 -3.2283923 -5.4035769 -2.1190007 -5.5415234 -1.3942752 2.2271298 2.4763802 0.9951147 7.2106299 -3.6705115
86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102
-4.0562506 -2.6050825 2.2253612 -0.6344070 -1.7984823 -1.3325552 -2.2520966 6.5027968 -0.4457886 1.0550630 -3.3616225 -4.5509836 0.4535341 0.5300748 -0.9801669 2.9973939 1.3750704
103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119
1.5953003 -4.9345018 -2.4455672 7.0465516 -1.7276856 -0.3215003 -0.4133246 2.9320579 -2.2492757 -3.4825074 -5.7859859 -1.4772208 1.8647427 -5.5206398 1.8961396 0.3861443 -2.6258278
120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136
-1.2608866 7.3346754 -0.3648861 -0.9025271 -8.3644404 -0.2970551 -1.8090516 -4.4254561 0.5833549 -4.5589025 1.1898920 -0.1771279 0.4107215 -0.2915643 1.0268999 1.7803945 -0.9484046
137
0.1661597
merlin_by_all.lm <- lm(response ~ birdlife_pool_size + realm + population_growth + region_100km_cultivated + percentage_urban_area_as_open_public_spaces + biome_name + rainfall_monthly_min + region_20km_average_pop_density+ permanent_water + population_growth + temperature_annual_average + temperature_monthly_min, data = merlin_city_data_fixed)
Error in eval(predvars, data, env) :
object 'birdlife_pool_size' not found
merlin_by_preferred.lm <- lm(response ~ merlin_pool_size + biome_name + rainfall_monthly_min, data = merlin_city_data_fixed)
merlin_city_data$residuals_preferred <- resid(merlin_by_preferred.lm)
birdlife_by_preferred.lm <- lm(response ~ birdlife_pool_size + biome_name + rainfall_monthly_min, data = birdlife_city_data_fixed)
birdlife_city_data$residuals_preferred <- resid(birdlife_by_preferred.lm)
write_csv(ordered_cities, "city_effect_residuals.csv")
merlin_city_data$name <- merlin_city_data_2$name
plot_merlin_poolsize <- ggplot(merlin_city_data, aes(y = response, x = merlin_pool_size)) +
geom_smooth(method = "lm", se = F) +
geom_point(aes(color = residuals_pool_size), size = 4) +
geom_label_repel(aes(label = name), size = 4) +
xlab("Pool Size") + ylab("City Random Effect Response") +
guides(color=guide_legend(title="Model residuals 'response ~ pool_size'")) +
theme_bw() + theme(legend.position="bottom", legend.title=element_text(size=9), legend.text=element_text(size=8), legend.key.size = unit(1,"line")) +
labs(title = "Merlin response given pool size")
plot_merlin_poolsize
`geom_smooth()` using formula 'y ~ x'
Warning: ggrepel: 123 unlabeled data points (too many overlaps). Consider increasing max.overlaps
birdlife_city_data$name <- birdlife_city_data_2$name
plot_birdlife_poolsize <- ggplot(birdlife_city_data, aes(y = response, x = birdlife_pool_size)) +
geom_smooth(method = "lm", se = F) +
geom_point(aes(color = residuals_pool_size), size = 4) +
geom_label_repel(aes(label = name), size = 4) +
xlab("Pool Size") + ylab("City Random Effect Response") +
guides(color=guide_legend(title="Model residuals 'response ~ pool_size'")) +
theme_bw() + theme(legend.position="bottom", legend.title=element_text(size=9), legend.text=element_text(size=8), legend.key.size = unit(1,"line")) +
labs(title = "Birdlife response given pool size")
plot_birdlife_poolsize
`geom_smooth()` using formula 'y ~ x'
Warning: ggrepel: 114 unlabeled data points (too many overlaps). Consider increasing max.overlaps
plot_merlin_preferred <- ggplot(merlin_city_data, aes(y = response, x = merlin_pool_size)) +
geom_smooth(method = "lm", se = F) +
geom_point(aes(color = residuals_preferred), size = 4) +
geom_label_repel(aes(label = name), size = 4) +
xlab("Pool Size") + ylab("City Random Effect Response") +
guides(color=guide_legend(title="Model residuals 'response ~ pool_size + biome + rainfall_min'")) +
theme_bw() + theme(legend.position="bottom", legend.title=element_text(size=9), legend.text=element_text(size=8), legend.key.size = unit(1,"line")) +
labs(title = "Merlin response given pool size, biome, and rainfall")
plot_merlin_preferred
`geom_smooth()` using formula 'y ~ x'
Warning: ggrepel: 123 unlabeled data points (too many overlaps). Consider increasing max.overlaps
plot_birdlife_preferred <- ggplot(birdlife_city_data, aes(y = response, x = birdlife_pool_size)) +
geom_smooth(method = "lm", se = F) +
geom_point(aes(color = residuals_preferred), size = 4) +
geom_label_repel(aes(label = name), size = 4) +
xlab("Pool Size") + ylab("City Random Effect Response") +
guides(color=guide_legend(title="Model residuals 'response ~ pool_size + biome + rainfall_min'")) +
theme_bw() + theme(legend.position="bottom", legend.title=element_text(size=9), legend.text=element_text(size=8), legend.key.size = unit(1,"line")) +
labs(title = "Birdlife response given pool size, biome, and rainfall")
plot_birdlife_preferred
`geom_smooth()` using formula 'y ~ x'
Warning: ggrepel: 114 unlabeled data points (too many overlaps). Consider increasing max.overlaps
library(ggpubr)
plot_residuals <- ggarrange(plot_merlin_poolsize, plot_birdlife_poolsize, plot_merlin_preferred, plot_birdlife_preferred)
`geom_smooth()` using formula 'y ~ x'
`geom_smooth()` using formula 'y ~ x'
`geom_smooth()` using formula 'y ~ x'
`geom_smooth()` using formula 'y ~ x'
plot_residuals
Warning: ggrepel: 134 unlabeled data points (too many overlaps). Consider increasing max.overlaps
Warning: ggrepel: 136 unlabeled data points (too many overlaps). Consider increasing max.overlaps
Warning: ggrepel: 134 unlabeled data points (too many overlaps). Consider increasing max.overlaps
Warning: ggrepel: 136 unlabeled data points (too many overlaps). Consider increasing max.overlaps
jpeg("city_effect_residuals.jpg", width = 1600, height = 1200)
plot_residuals
Warning: ggrepel: 83 unlabeled data points (too many overlaps). Consider increasing max.overlaps
Warning: ggrepel: 69 unlabeled data points (too many overlaps). Consider increasing max.overlaps
Warning: ggrepel: 83 unlabeled data points (too many overlaps). Consider increasing max.overlaps
Warning: ggrepel: 69 unlabeled data points (too many overlaps). Consider increasing max.overlaps
dev.off()
null device
1
merlin_city_data_200 <- merlin_city_data[merlin_city_data$merlin_pool_size > 190 & merlin_city_data$merlin_pool_size < 210,]
merlin_city_data_200[order(merlin_city_data_200$response), c("name", "response")]
birdlife_city_data_200 <- birdlife_city_data[birdlife_city_data$birdlife_pool_size > 190 & birdlife_city_data$birdlife_pool_size < 210,]
birdlife_city_data_200[order(birdlife_city_data_200$response), c("name", "response")]
birdlife_city_data_300 <- birdlife_city_data[birdlife_city_data$birdlife_pool_size > 290 & birdlife_city_data$birdlife_pool_size < 310,]
birdlife_city_data_300[order(birdlife_city_data_300$response), c("name", "response")]
merlin_city_data_300 <- merlin_city_data[merlin_city_data$merlin_pool_size > 290 & merlin_city_data$merlin_pool_size < 310,]
merlin_city_data_300[order(merlin_city_data_300$response), c("name", "response")]
ggplot(city_data_subset, aes(x = label, y = response, color = label)) +
geom_label_repel(aes(label = biome_name), size = 3) + geom_point() +
theme_bw() + theme(legend.position = "none") + xlab("Pool size (Pool)") + ylab("Random Effect Response")
Warning: ggrepel: 22 unlabeled data points (too many overlaps). Consider increasing max.overlaps
table(city_data$biome_name)
Boreal Forests/Taiga Deserts & Xeric Shrublands Flooded Grasslands & Savannas
1 11 2
Mangroves Mediterranean Forests, Woodlands & Scrub Montane Grasslands & Shrublands
2 15 2
Temperate Broadleaf & Mixed Forests Temperate Conifer Forests Temperate Grasslands, Savannas & Shrublands
32 2 5
Tropical & Subtropical Coniferous Forests Tropical & Subtropical Dry Broadleaf Forests Tropical & Subtropical Grasslands, Savannas & Shrublands
2 14 5
Tropical & Subtropical Moist Broadleaf Forests
44
summary(glm(response ~ relevel(biome_name, ref = "Temperate Broadleaf & Mixed Forests") + merlin_pool_size, merlin_city_data, family = "gaussian"))
Call:
glm(formula = response ~ relevel(biome_name, ref = "Temperate Broadleaf & Mixed Forests") +
merlin_pool_size, family = "gaussian", data = merlin_city_data)
Deviance Residuals:
Min 1Q Median 3Q Max
-8.0835 -1.9045 -0.2273 1.7119 16.1025
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 5.885752 1.000848 5.881 3.60e-08 ***
relevel(biome_name, ref = "Temperate Broadleaf & Mixed Forests")Boreal Forests/Taiga -5.189947 3.612126 -1.437 0.1533
relevel(biome_name, ref = "Temperate Broadleaf & Mixed Forests")Deserts & Xeric Shrublands 1.434085 1.257137 1.141 0.2562
relevel(biome_name, ref = "Temperate Broadleaf & Mixed Forests")Flooded Grasslands & Savannas -2.670900 2.586224 -1.033 0.3038
relevel(biome_name, ref = "Temperate Broadleaf & Mixed Forests")Mangroves 5.849655 2.586319 2.262 0.0255 *
relevel(biome_name, ref = "Temperate Broadleaf & Mixed Forests")Mediterranean Forests, Woodlands & Scrub -0.079682 1.110329 -0.072 0.9429
relevel(biome_name, ref = "Temperate Broadleaf & Mixed Forests")Montane Grasslands & Shrublands 0.324356 2.601094 0.125 0.9010
relevel(biome_name, ref = "Temperate Broadleaf & Mixed Forests")Temperate Conifer Forests 1.206379 2.590705 0.466 0.6423
relevel(biome_name, ref = "Temperate Broadleaf & Mixed Forests")Temperate Grasslands, Savannas & Shrublands 2.811650 1.721678 1.633 0.1050
relevel(biome_name, ref = "Temperate Broadleaf & Mixed Forests")Tropical & Subtropical Coniferous Forests 5.225315 2.589895 2.018 0.0458 *
relevel(biome_name, ref = "Temperate Broadleaf & Mixed Forests")Tropical & Subtropical Dry Broadleaf Forests 1.775978 1.150258 1.544 0.1252
relevel(biome_name, ref = "Temperate Broadleaf & Mixed Forests")Tropical & Subtropical Grasslands, Savannas & Shrublands 2.889845 1.706685 1.693 0.0929 .
relevel(biome_name, ref = "Temperate Broadleaf & Mixed Forests")Tropical & Subtropical Moist Broadleaf Forests 0.567393 0.845207 0.671 0.5033
merlin_pool_size -0.024120 0.003215 -7.503 1.09e-11 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for gaussian family taken to be 12.59017)
Null deviance: 2469.6 on 136 degrees of freedom
Residual deviance: 1548.6 on 123 degrees of freedom
AIC: 751.03
Number of Fisher Scoring iterations: 2
summary(glm(response ~ relevel(biome_name, ref = "Temperate Broadleaf & Mixed Forests") + birdlife_pool_size, birdlife_city_data, family = "gaussian"))
Call:
glm(formula = response ~ relevel(biome_name, ref = "Temperate Broadleaf & Mixed Forests") +
birdlife_pool_size, family = "gaussian", data = birdlife_city_data)
Deviance Residuals:
Min 1Q Median 3Q Max
-5.1393 -1.3900 -0.1492 0.9716 9.4609
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 2.610738 0.717523 3.639 0.000402 ***
relevel(biome_name, ref = "Temperate Broadleaf & Mixed Forests")Boreal Forests/Taiga -3.270154 2.364009 -1.383 0.169076
relevel(biome_name, ref = "Temperate Broadleaf & Mixed Forests")Deserts & Xeric Shrublands 0.847287 0.821355 1.032 0.304297
relevel(biome_name, ref = "Temperate Broadleaf & Mixed Forests")Flooded Grasslands & Savannas -1.251925 1.697245 -0.738 0.462149
relevel(biome_name, ref = "Temperate Broadleaf & Mixed Forests")Mangroves 2.313444 1.719089 1.346 0.180862
relevel(biome_name, ref = "Temperate Broadleaf & Mixed Forests")Mediterranean Forests, Woodlands & Scrub -0.052808 0.730027 -0.072 0.942452
relevel(biome_name, ref = "Temperate Broadleaf & Mixed Forests")Montane Grasslands & Shrublands 1.774103 1.761046 1.007 0.315713
relevel(biome_name, ref = "Temperate Broadleaf & Mixed Forests")Temperate Conifer Forests 1.175044 1.696677 0.693 0.489894
relevel(biome_name, ref = "Temperate Broadleaf & Mixed Forests")Temperate Grasslands, Savannas & Shrublands 1.578314 1.120724 1.408 0.161565
relevel(biome_name, ref = "Temperate Broadleaf & Mixed Forests")Tropical & Subtropical Coniferous Forests 2.969675 1.708484 1.738 0.084679 .
relevel(biome_name, ref = "Temperate Broadleaf & Mixed Forests")Tropical & Subtropical Dry Broadleaf Forests 2.104071 0.770869 2.729 0.007274 **
relevel(biome_name, ref = "Temperate Broadleaf & Mixed Forests")Tropical & Subtropical Grasslands, Savannas & Shrublands 0.669519 1.183055 0.566 0.572477
relevel(biome_name, ref = "Temperate Broadleaf & Mixed Forests")Tropical & Subtropical Moist Broadleaf Forests 0.561698 0.634580 0.885 0.377803
birdlife_pool_size -0.010901 0.002526 -4.315 3.25e-05 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for gaussian family taken to be 5.414085)
Null deviance: 865.45 on 136 degrees of freedom
Residual deviance: 665.93 on 123 degrees of freedom
AIC: 635.41
Number of Fisher Scoring iterations: 2
unique(city_data$biome_name)
[1] Tropical & Subtropical Grasslands, Savannas & Shrublands Montane Grasslands & Shrublands Tropical & Subtropical Moist Broadleaf Forests
[4] Tropical & Subtropical Dry Broadleaf Forests Mediterranean Forests, Woodlands & Scrub Temperate Broadleaf & Mixed Forests
[7] Temperate Grasslands, Savannas & Shrublands Flooded Grasslands & Savannas Deserts & Xeric Shrublands
[10] Tropical & Subtropical Coniferous Forests Mangroves Boreal Forests/Taiga
[13] Temperate Conifer Forests
13 Levels: Boreal Forests/Taiga Deserts & Xeric Shrublands Flooded Grasslands & Savannas Mangroves Mediterranean Forests, Woodlands & Scrub ... Tropical & Subtropical Moist Broadleaf Forests
summary(glm(response ~ biome_vegetation + biome_location + biome_climate + merlin_pool_size, tmp, family = "gaussian"))
Call:
glm(formula = response ~ biome_vegetation + biome_location +
biome_climate + merlin_pool_size, family = "gaussian", data = tmp)
Deviance Residuals:
Min 1Q Median 3Q Max
-8.0908 -1.9288 -0.1578 1.5228 16.1552
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -1.207549 4.010921 -0.301 0.7639
biome_vegetationGrassland & Shrublands 0.897658 1.393377 0.644 0.5206
biome_locationDesert 7.462201 4.558021 1.637 0.1041
biome_locationGlobal 4.603752 5.033288 0.915 0.3621
biome_locationMediterranean 5.068451 3.681711 1.377 0.1711
biome_locationMontane 4.526499 4.588225 0.987 0.3258
biome_locationTemperate 5.447927 3.616762 1.506 0.1345
biome_locationTropical & Subtropical 8.707133 3.833935 2.271 0.0248 *
biome_climateNormal 1.813755 1.716024 1.057 0.2926
biome_climateWet -1.210677 1.091495 -1.109 0.2695
merlin_pool_size -0.023574 0.003203 -7.360 2.09e-11 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for gaussian family taken to be 12.65148)
Null deviance: 2469.6 on 136 degrees of freedom
Residual deviance: 1594.1 on 126 degrees of freedom
AIC: 749
Number of Fisher Scoring iterations: 2
summary(glm(response ~ biome_name + rainfall_monthly_min * merlin_pool_size, tmp, family = "gaussian"))
Call:
glm(formula = response ~ biome_name + rainfall_monthly_min *
merlin_pool_size, family = "gaussian", data = tmp)
Deviance Residuals:
Min 1Q Median 3Q Max
-8.1207 -2.0357 -0.2982 1.7386 16.9663
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -1.7651540 3.7445952 -0.471 0.63821
biome_nameDeserts & Xeric Shrublands 6.9199788 3.7166807 1.862 0.06505 .
biome_nameFlooded Grasslands & Savannas 3.1663312 4.3217462 0.733 0.46519
biome_nameMangroves 11.4058602 4.3149495 2.643 0.00930 **
biome_nameMediterranean Forests, Woodlands & Scrub 5.6330661 3.6528049 1.542 0.12566
biome_nameMontane Grasslands & Shrublands 5.5893414 4.3513235 1.285 0.20142
biome_nameTemperate Broadleaf & Mixed Forests 4.8242325 3.5815194 1.347 0.18050
biome_nameTemperate Conifer Forests 6.3633304 4.3224710 1.472 0.14358
biome_nameTemperate Grasslands, Savannas & Shrublands 7.7518254 3.8814809 1.997 0.04805 *
biome_nameTropical & Subtropical Coniferous Forests 10.7588673 4.3372299 2.481 0.01449 *
biome_nameTropical & Subtropical Dry Broadleaf Forests 7.4313934 3.6779487 2.021 0.04554 *
biome_nameTropical & Subtropical Grasslands, Savannas & Shrublands 8.6265332 3.8689336 2.230 0.02761 *
biome_nameTropical & Subtropical Moist Broadleaf Forests 5.8902326 3.5806727 1.645 0.10256
rainfall_monthly_min 0.0846540 0.0403496 2.098 0.03798 *
merlin_pool_size -0.0169689 0.0050538 -3.358 0.00105 **
rainfall_monthly_min:merlin_pool_size -0.0002442 0.0001253 -1.949 0.05367 .
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for gaussian family taken to be 12.34648)
Null deviance: 2469.6 on 136 degrees of freedom
Residual deviance: 1493.9 on 121 degrees of freedom
AIC: 750.11
Number of Fisher Scoring iterations: 2
library(boot)
results <- boot(data=merlin_city_data, statistic=rsq, R=1000, formula=response ~ biome_name + merlin_pool_size)
boot.ci(results, type="bca")
BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS
Based on 1000 bootstrap replicates
CALL :
boot.ci(boot.out = results, type = "bca")
Intervals :
Level BCa
95% ( 0.2256, 0.4651 )
Calculations and Intervals on Original Scale
Some BCa intervals may be unstable
results
ORDINARY NONPARAMETRIC BOOTSTRAP
Call:
boot(data = merlin_city_data, statistic = rsq, R = 1000, formula = response ~
biome_name + merlin_pool_size)
Bootstrap Statistics :
original bias std. error
t1* 0.3729407 0.03398538 0.06274843
results <- boot(data=merlin_city_data, statistic=rsq, R=1000, formula=response ~ biome_name + rainfall_monthly_min * merlin_pool_size)
boot.ci(results, type="bca")
Warning in norm.inter(t, adj.alpha) :
extreme order statistics used as endpoints
BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS
Based on 1000 bootstrap replicates
CALL :
boot.ci(boot.out = results, type = "bca")
Intervals :
Level BCa
95% ( 0.2317, 0.4865 )
Calculations and Intervals on Original Scale
Warning : BCa Intervals used Extreme Quantiles
Some BCa intervals may be unstable
results
ORDINARY NONPARAMETRIC BOOTSTRAP
Call:
boot(data = merlin_city_data, statistic = rsq, R = 1000, formula = response ~
biome_name + rainfall_monthly_min * merlin_pool_size)
Bootstrap Statistics :
original bias std. error
t1* 0.3950763 0.04527929 0.06933429
results <- boot(data=merlin_city_data, statistic=rsq, R=1000, formula=response ~ merlin_pool_size)
boot.ci(results, type="bca")
BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS
Based on 1000 bootstrap replicates
CALL :
boot.ci(boot.out = results, type = "bca")
Intervals :
Level BCa
95% ( 0.1645, 0.3803 )
Calculations and Intervals on Original Scale
results
ORDINARY NONPARAMETRIC BOOTSTRAP
Call:
boot(data = merlin_city_data, statistic = rsq, R = 1000, formula = response ~
merlin_pool_size)
Bootstrap Statistics :
original bias std. error
t1* 0.275204 0.003607655 0.05538602